% This file was created with JabRef 2.10.
% Encoding: UTF-8

@IEEEtranBSTCTL{IEEEexample:BSTcontrol,
CTLuse_forced_etal       = "yes",
CTLmax_names_forced_etal = "3",
CTLnames_show_etal       = "2" }


@Article{abadi2016tensorflow,
  Title                    = {Tensorflow: {Large-scale} machine learning on heterogeneous distributed systems},
  Author                   = {Abadi, Mart{\'\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
  Journal                  = {arXiv preprint arXiv:1603.04467},
  Year                     = {2016},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1603.04467}
}

@Misc{AlexanderMordvintsev2015,
  Title                    = {Inceptionism: {Going} Deeper into Neural Networks},

  Author                   = {Alexander Mordvintsev, Christopher Olah, Mike Tyka},
  Month                    = jun,
  Year                     = {2015},

  Url                      = {https://research.googleblog.com/2015/06/inceptionism-going-deeper-into-neural.html}
}

@InCollection{andrychowicz2016learning,
  Title                    = {Learning to learn by gradient descent by gradient descent},
  Author                   = {Andrychowicz, Marcin and Denil, Misha and G\'{o}mez, Sergio and Hoffman, Matthew W and Pfau, David and Schaul, Tom and de Freitas, Nando},
  Booktitle                = {Advances in Neural Information Processing Systems 29 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2016},
  Editor                   = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
  Month                    = mar,
  Pages                    = {3981--3989},

  Url                      = {http://papers.nips.cc/paper/6461-learning-to-learn-by-gradient-descent-by-gradient-descent.pdf}
}

@InProceedings{ankerst1999optics,
  Title                    = {{OPTICS}: {Ordering} points to identify the clustering structure},
  Author                   = {Ankerst, Mihael and Breunig, Markus M and Kriegel, Hans-Peter and Sander, J{\"o}rg},
  Booktitle                = {ACM Sigmod record},
  Year                     = {1999},
  Number                   = {2},
  Organization             = {ACM},
  Pages                    = {49--60},
  Volume                   = {28}
}

@Article{baker2016designing,
  Title                    = {Designing Neural Network Architectures using Reinforcement Learning},
  Author                   = {Baker, Bowen and Gupta, Otkrist and Naik, Nikhil and Raskar, Ramesh},
  Journal                  = {arXiv preprint arXiv:1611.02167},
  Year                     = {2016},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1611.02167}
}

@Article{Bao2017,
  Title                    = {{CVAE-GAN}: {Fine}-Grained Image Generation through Asymmetric Training},
  Author                   = {Jianmin Bao AND Dong Chen AND Fang Wen AND Houqiang Li AND Gang Hua},
  Journal                  = {arXiv preprint arXiv:1703.10155},
  Year                     = {2017},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1703.10155}
}

@InProceedings{behmo2010towards,
  Title                    = {Towards optimal naive {Bayes} nearest neighbor},
  Author                   = {Behmo, R{\'e}gis and Marcombes, Paul and Dalalyan, Arnak and Prinet, V{\'e}ronique},
  Booktitle                = {European Conference on Computer Vision (ECCV)},
  Year                     = {2010},
  Organization             = {Springer},
  Pages                    = {171--184}
}

@Article{bengio1994learning,
  Title                    = {Learning long-term dependencies with gradient descent is difficult},
  Author                   = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo},
  Journal                  = {IEEE transactions on neural networks},
  Year                     = {1994},
  Number                   = {2},
  Pages                    = {157--166},
  Volume                   = {5},

  Publisher                = {IEEE}
}

@Article{bergstra2012random,
  Title                    = {Random search for hyper-parameter optimization},
  Author                   = {Bergstra, James and Bengio, Yoshua},
  Journal                  = {Journal of Machine Learning Research},
  Year                     = {2012},

  Month                    = feb,
  Number                   = {Feb},
  Pages                    = {281--305},
  Volume                   = {13},

  Url                      = {http://jmlr.csail.mit.edu/papers/volume13/bergstra12a/bergstra12a.pdf}
}

@TechReport{bergstra2009quadratic,
  Title                    = {Quadratic polynomials learn better image features},
  Author                   = {Bergstra, James and Desjardins, Guillaume and Lamblin, Pascal and Bengio, Yoshua},
  Institution              = {D{\'e}partement d’Informatique et de Recherche Op{\'e}rationnelle, Universit{\'e} de Montr{\'e}al},
  Year                     = {2009},
  Number                   = {1337}
}

@InBook{Bodenhausen1993,
  Title                    = {Automatically Structured Neural Networks For Handwritten Character And Word Recognition},
  Author                   = {Bodenhausen, Ulrich
and Manke, Stefan},
  Editor                   = {Gielen, Stan
and Kappen, Bert},
  Pages                    = {956--961},
  Publisher                = {Springer London},
  Year                     = {1993},

  Address                  = {London},
  Month                    = sep,

  Booktitle                = {International Conference on Artificial Neural Networks (ICANN)},
  Doi                      = {10.1007/978-1-4471-2063-6_283},
  ISBN                     = {978-1-4471-2063-6},
  Url                      = {http://dx.doi.org/10.1007/978-1-4471-2063-6_283}
}

@InProceedings{boureau2010theoretical,
  Title                    = {A theoretical analysis of feature pooling in visual recognition},
  Author                   = {Boureau, Y-Lan and Ponce, Jean and LeCun, Yann},
  Booktitle                = {International Conference on Machine Learning (ICML)},
  Year                     = {2010},
  Number                   = {27},
  Pages                    = {111--118},

  Url                      = {http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf}
}

@Article{charalambous1992conjugate,
  Title                    = {Conjugate gradient algorithm for efficient training of artificial neural networks},
  Author                   = {Charalambous, Christakis},
  Journal                  = {IEEE Proceedings G-Circuits, Devices and Systems},
  Year                     = {1992},
  Number                   = {3},
  Pages                    = {301--310},
  Volume                   = {139},

  File                     = {:home/moose/GitHub/informatik-2011/Master/Master-Arbeit/paper/conjugate-gradient.pdf:PDF},
  Publisher                = {IET},
  Url                      = {http://ieeexplore.ieee.org/document/143326/}
}

@Article{chetlur2014cudnn,
  Title                    = {{cuDNN}: {Efficient} primitives for deep learning},
  Author                   = {Chetlur, Sharan and Woolley, Cliff and Vandermersch, Philippe and Cohen, Jonathan and Tran, John and Catanzaro, Bryan and Shelhamer, Evan},
  Journal                  = {arXiv preprint arXiv:1410.0759},
  Year                     = {2014},

  Month                    = oct,

  Url                      = {https://arxiv.org/abs/1410.0759}
}

@Misc{chollet2015keras,
  Title                    = {Keras},

  Author                   = {Chollet, Fran\c{c}ois},
  HowPublished             = {\url{https://github.com/fchollet/keras}},
  Year                     = {2015},

  Publisher                = {GitHub}
}

@InProceedings{ciregan2012multi,
  Title                    = {Multi-column deep neural networks for image classification},
  Author                   = {Ciregan, Dan and Meier, Ueli and Schmidhuber, J{\"u}rgen},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2012},
  Month                    = feb,
  Organization             = {IEEE},
  Pages                    = {3642--3649},

  Url                      = {https://arxiv.org/abs/1202.2745v1}
}

@Article{clevert2015fast,
  Title                    = {Fast and accurate deep network learning by exponential linear units ({ELUs})},
  Author                   = {Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp},
  Journal                  = {arXiv preprint arXiv:1511.07289},
  Year                     = {2015},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1511.07289}
}

@Misc{STL-10,
  Title                    = {{STL-10} dataset},

  Author                   = {Coates, Adam and Lee, Honglak and Ng, Andrew Y},
  Year                     = {2011},

  Url                      = {http://cs.stanford.edu/~acoates/stl10}
}

@Article{coates2010analysis,
  Title                    = {An analysis of single-layer networks in unsupervised feature learning},
  Author                   = {Coates, Adam and Lee, Honglak and Ng, Andrew Y},
  Journal                  = {Ann Arbor},
  Year                     = {2010},
  Number                   = {48109},
  Pages                    = {2},
  Volume                   = {1001},

  Url                      = {http://cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf}
}

@InProceedings{dai2016instance,
  Title                    = {Instance-aware semantic segmentation via multi-task network cascades},
  Author                   = {Dai, Jifeng and He, Kaiming and Sun, Jian},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2016},
  Organization             = {IEEE},
  Pages                    = {3150--3158},

  Url                      = {https://arxiv.org/abs/1512.04412}
}

@Article{demvsar2006statistical,
  Title                    = {Statistical comparisons of classifiers over multiple data sets},
  Author                   = {Dem{\v{s}}ar, Janez},
  Journal                  = {Journal of Machine learning research},
  Year                     = {2006},
  Number                   = {Jan},
  Pages                    = {1--30},
  Volume                   = {7},

  Url                      = {http://jmlr.csail.mit.edu/papers/volume7/demsar06a/demsar06a.pdf}
}

@Article{dieleman2016exploiting,
  Title                    = {Exploiting cyclic symmetry in convolutional neural networks},
  Author                   = {Dieleman, Sander and De Fauw, Jeffrey and Kavukcuoglu, Koray},
  Journal                  = {arXiv preprint arXiv:1602.02660},
  Year                     = {2016},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1602.02660}
}

@Article{dieleman2015rotation,
  Title                    = {Rotation-invariant convolutional neural networks for galaxy morphology prediction},
  Author                   = {Dieleman, Sander and Willett, Kyle W and Dambre, Joni},
  Journal                  = {Monthly notices of the royal astronomical society},
  Year                     = {2015},
  Number                   = {2},
  Pages                    = {1441--1459},
  Volume                   = {450},

  Publisher                = {Oxford University Press}
}

@Article{dietterich1998approximate,
  Title                    = {Approximate statistical tests for comparing supervised classification learning algorithms},
  Author                   = {Dietterich, Thomas G},
  Journal                  = {Neural computation},
  Year                     = {1998},

  Month                    = mar,
  Number                   = {7},
  Pages                    = {1895--1923},
  Volume                   = {10},

  Doi                      = {10.1162/089976698300017197},
  Publisher                = {MIT Press},
  Url                      = {http://www.mitpressjournals.org/doi/abs/10.1162/089976698300017197}
}

@InCollection{NIPS2014_5548,
  Title                    = {Discriminative Unsupervised Feature Learning with Convolutional Neural Networks},
  Author                   = {Dosovitskiy, Alexey and Springenberg, Jost Tobias and Riedmiller, Martin and Brox, Thomas},
  Booktitle                = {Advances in Neural Information Processing Systems 27 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2014},
  Editor                   = {Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence and K. Q. Weinberger},
  Pages                    = {766--774},

  Url                      = {http://papers.nips.cc/paper/5548-discriminative-unsupervised-feature-learning-with-convolutional-neural-networks.pdf}
}

@TechReport{dozat2015incorporating,
  Title                    = {Incorporating {Nesterov} momentum into {Adam}},
  Author                   = {Dozat, Timothy},
  Institution              = {Stanford University},
  Year                     = {2015},

  Url                      = {http://cs229.stanford.edu/proj2015/054_report.pdf}
}

@Article{duch1999survey,
  Title                    = {Survey of neural transfer functions},
  Author                   = {Duch, W{\l}odzis{\l}aw and Jankowski, Norbert},
  Journal                  = {Neural Computing Surveys},
  Year                     = {1999},
  Number                   = {1},
  Pages                    = {163--212},
  Volume                   = {2},

  Url                      = {ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf}
}

@Article{duchi2011adaptive,
  Title                    = {Adaptive subgradient methods for online learning and stochastic optimization},
  Author                   = {Duchi, John and Hazan, Elad and Singer, Yoram},
  Journal                  = {Journal of Machine Learning Research},
  Year                     = {2011},
  Number                   = {Jul},
  Pages                    = {2121--2159},
  Volume                   = {12},

  Url                      = {http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf}
}

@InCollection{dugas2001incorporating,
  Title                    = {Incorporating Second-Order Functional Knowledge for Better Option Pricing},
  Author                   = {Charles Dugas and Bengio, Yoshua and Fran\c{c}ois B\'{e}lisle and Claude Nadeau and Ren\'{e} Garcia},
  Booktitle                = {Advances in Neural Information Processing Systems 13 (NIPS)},
  Publisher                = {MIT Press},
  Year                     = {2001},
  Editor                   = {T. K. Leen and T. G. Dietterich and V. Tresp},
  Pages                    = {472--478},

  Url                      = {http://papers.nips.cc/paper/1920-incorporating-second-order-functional-knowledge-for-better-option-pricing.pdf}
}

@Book{eiben2003introduction,
  Title                    = {Introduction to evolutionary computing},
  Author                   = {Eiben, Agoston E and Smith, James E},
  Publisher                = {Springer},
  Year                     = {2003},
  Volume                   = {53},

  Url                      = {https://dx.doi.org/10.1007/978-3-662-44874-8}
}

@InProceedings{asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization,
  Title                    = {Asirra: {A} {CAPTCHA} that Exploits Interest-Aligned Manual Image Categorization},
  Author                   = {Elson, Jeremy and Douceur, John (JD) and Howell, Jon and Saul, Jared},
  Booktitle                = {ACM Conference on Computer and Communications Security (CCS)},
  Year                     = {2007},
  Month                    = oct,
  Number                   = {14},
  Publisher                = {Association for Computing Machinery, Inc.},

  Abstract                 = {We present Asirra, a CAPTCHA that asks users to identify cats out of a set of 12 photographs of both cats and dogs. Asirra is easy for users; user studies indicate it can be solved by humans 99.6% of the time in under 30 seconds. Barring a major advance in machine vision, we expect computers will have no better than a 1/54,000 chance of solving it. Asirra’s image database is provided by a novel, mutually beneficial partnership with Petfinder.com. In exchange for the use of their three million images, we display an "adopt me" link beneath each one, promoting Petfinder’s primary mission of finding homes for homeless animals. We describe the design of Asirra, discuss threats to its security, and report early deployment experiences. We also describe two novel algorithms for amplifying the skill gap between humans and computers that can be used on many existing CAPTCHAs.},
  Url                      = {https://www.microsoft.com/en-us/research/publication/asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization/}
}

@InProceedings{ester1996density-dbscan,
  Title                    = {A density-based algorithm for discovering clusters in large spatial databases with noise.},
  Author                   = {Ester, Martin and Kriegel, Hans-Peter and Sander, J{\"o}rg and Xu, Xiaowei and others},
  Booktitle                = {Kdd},
  Year                     = {1996},
  Number                   = {34},
  Pages                    = {226--231},
  Volume                   = {96}
}

@Article{quickprop,
  Title                    = {An empirical study of learning speed in back-propagation networks},
  Author                   = {Fahlman, Scott E},
  Year                     = {1988},

  Url                      = {http://repository.cmu.edu/cgi/viewcontent.cgi?article=2799&context=compsci}
}

@Article{fahlman1989cascade,
  Title                    = {The cascade-correlation learning architecture},
  Author                   = {Fahlman, Scott E and Lebiere, Christian},
  Year                     = {1989},

  Url                      = {http://repository.cmu.edu/compsci/1938/}
}

@Article{fei2006one,
  Title                    = {One-shot learning of object categories},
  Author                   = {Fei-Fei, Li and Fergus, Rob and Perona, Pietro},
  Journal                  = {IEEE transactions on pattern analysis and machine intelligence},
  Year                     = {2006},

  Month                    = apr,
  Number                   = {4},
  Pages                    = {594--611},
  Volume                   = {28},

  Publisher                = {IEEE},
  Url                      = {http://vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf}
}

@Misc{Caltech-101,
  Title                    = {Caltech 101},

  Author                   = {Fei-Fei, R. Fergus and P. Perona},
  Year                     = {2003},

  Url                      = {http://www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html}
}

@Article{felzenszwalb2010object,
  Title                    = {Object detection with discriminatively trained part-based models},
  Author                   = {Felzenszwalb, Pedro F and Girshick, Ross B and McAllester, David and Ramanan, Deva},
  Journal                  = {IEEE transactions on pattern analysis and machine intelligence},
  Year                     = {2010},
  Number                   = {9},
  Pages                    = {1627--1645},
  Volume                   = {32},

  Publisher                = {IEEE}
}

@Article{gal2015bayesian,
  Title                    = {Bayesian convolutional neural networks with {Bernoulli} approximate variational inference},
  Author                   = {Gal, Yarin and Ghahramani, Zoubin},
  Journal                  = {arXiv preprint arXiv:1506.02158},
  Year                     = {2016},

  Month                    = jan,

  Url                      = {https://arxiv.org/abs/1506.02158v6}
}

@Book{garey2002computers,
  Title                    = {Computers and intractability},
  Author                   = {Garey, Michael R and Johnson, David S},
  Publisher                = {wh freeman New York},
  Year                     = {2002},
  Volume                   = {29}
}

@Article{garey1976some,
  Title                    = {Some simplified {NP}-complete graph problems},
  Author                   = {Garey, Michael R and Johnson, David S. and Stockmeyer, Larry},
  Journal                  = {Theoretical computer science},
  Year                     = {1976},
  Number                   = {3},
  Pages                    = {237--267},
  Volume                   = {1},

  Publisher                = {Elsevier}
}

@InProceedings{girshick2014rich,
  Title                    = {Rich feature hierarchies for accurate object detection and semantic segmentation},
  Author                   = {Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2014},
  Organization             = {IEEE},
  Pages                    = {580--587},

  Url                      = {https://arxiv.org/abs/1311.2524}
}

@InProceedings{glorot2010understanding,
  Title                    = {Understanding the difficulty of training deep feedforward neural networks.},
  Author                   = {Glorot, Xavier and Bengio, Yoshua},
  Booktitle                = {Aistats},
  Year                     = {2010},
  Pages                    = {249--256},
  Volume                   = {9},

  Url                      = {http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf}
}

@InProceedings{glorot2011deep,
  Title                    = {Deep Sparse Rectifier Neural Networks.},
  Author                   = {Glorot, Xavier and Bordes, Antoine and Bengio, Yoshua},
  Booktitle                = {Aistats},
  Year                     = {2011},
  Number                   = {106},
  Pages                    = {275},
  Volume                   = {15},

  Url                      = {http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf}
}

@InProceedings{golle2008machine,
  Title                    = {Machine learning attacks against the {Asirra} {CAPTCHA}},
  Author                   = {Golle, Philippe},
  Booktitle                = {ACM conference on Computer and communications security (CCS)},
  Year                     = {2008},
  Number                   = {15},
  Organization             = {ACM},
  Pages                    = {535--542}
}

@Article{goodfellow2013maxout,
  Title                    = {Maxout networks.},
  Author                   = {Goodfellow, Ian J and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron C and Bengio, Yoshua},
  Journal                  = {ICML},
  Year                     = {2013},
  Number                   = {3},
  Pages                    = {1319--1327},
  Volume                   = {28},

  Url                      = {http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf}
}

@Article{graham2014fractional,
  Title                    = {Fractional max-pooling},
  Author                   = {Graham, Benjamin},
  Journal                  = {arXiv preprint arXiv:1412.6071},
  Year                     = {2015},

  Month                    = may,

  Url                      = {https://arxiv.org/abs/1412.6071}
}

@Article{GregGriffin2007,
  Title                    = {Caltech-256 Object Category Dataset},
  Author                   = {Greg Griffin, Alex Holub, Pietro Perona},
  Year                     = {2007},

  Month                    = apr,

  Publisher                = {California Institute of Technology},
  Url                      = {http://authors.library.caltech.edu/7694/}
}

@Misc{Griffin2006,
  Title                    = {Caltech 256},

  Author                   = {Griffin, G. Holub, AD. Perona},
  Year                     = {2006},

  Url                      = {http://www.vision.caltech.edu/Image_Datasets/Caltech256/}
}

@Book{han2011data,
  Title                    = {Data mining: concepts and techniques},
  Author                   = {Han, Jiawei and Pei, Jian and Kamber, Micheline},
  Publisher                = {Elsevier},
  Year                     = {2011}
}

@Article{han2016dsd,
  Title                    = {{DSD}: {Regularizing} deep neural networks with dense-sparse-dense training flow},
  Author                   = {Han, Song and Pool, Jeff and Narang, Sharan and Mao, Huizi and Tang, Shijian and Elsen, Erich and Catanzaro, Bryan and Tran, John and Dally, William J},
  Journal                  = {arXiv preprint arXiv:1607.04381},
  Year                     = {2016},

  Month                    = jul,

  Url                      = {https://arxiv.org/abs/1607.04381}
}

@InCollection{han2015learning,
  Title                    = {Learning both Weights and Connections for Efficient Neural Network},
  Author                   = {Han, Song and Pool, Jeff and Tran, John and Dally, William},
  Booktitle                = {Advances in Neural Information Processing Systems 28 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2015},
  Editor                   = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett},
  Month                    = jun,
  Pages                    = {1135--1143},

  Url                      = {http://papers.nips.cc/paper/5784-learning-both-weights-and-connections-for-efficient-neural-network.pdf}
}

@InProceedings{hanson1989meiosis,
  Title                    = {Meiosis Networks.},
  Author                   = {Hanson, Stephen Jos{\'e}},
  Booktitle                = {NIPS},
  Year                     = {1989},
  Pages                    = {533--541},

  Url                      = {http://papers.nips.cc/paper/227-meiosis-networks.pdf}
}

@Article{hardt2016identity,
  Title                    = {Identity Matters in Deep Learning},
  Author                   = {Hardt, Moritz and Ma, Tengyu},
  Journal                  = {arXiv preprint arXiv:1611.04231},
  Year                     = {2016},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1611.04231}
}

@Misc{Harris2015,
  Title                    = {New Features in {CUDA} 7.5},

  Author                   = {Mark Harris},
  Month                    = jul,
  Year                     = {2015},

  Url                      = {https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/}
}

@InProceedings{hassibi1993optimal,
  Title                    = {Optimal brain surgeon and general network pruning},
  Author                   = {Hassibi, Babak and Stork, David G and Wolff, Gregory J},
  Booktitle                = {International Conference on Neural Networks},
  Year                     = {1993},
  Organization             = {IEEE},
  Pages                    = {293--299},

  Url                      = {http://ee.caltech.edu/Babak/pubs/conferences/00298572.pdf}
}

@Article{deep-residual-networks-2015,
  Title                    = {Deep residual learning for image recognition},
  Author                   = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  Journal                  = {arXiv preprint arXiv:1512.03385},
  Year                     = {2015},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1512.03385v1}
}

@InProceedings{he2015delving,
  Title                    = {Delving deep into rectifiers: {Surpassing} human-level performance on imagenet classification},
  Author                   = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  Booktitle                = {International Conference on Computer Vision (ICCV)},
  Year                     = {2015},
  Month                    = feb,
  Pages                    = {1026--1034},

  Url                      = {https://arxiv.org/abs/1502.01852}
}

@InProceedings{he2014spatial,
  Title                    = {Spatial pyramid pooling in deep convolutional networks for visual recognition},
  Author                   = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  Booktitle                = {European Conference on Computer Vision (ECCV)},
  Year                     = {2014},
  Organization             = {Springer},
  Pages                    = {346--361},

  Url                      = {https://arxiv.org/abs/1406.4729}
}

@Article{hinton2015distilling,
  Title                    = {Distilling the knowledge in a neural network},
  Author                   = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
  Journal                  = {arXiv preprint arXiv:1503.02531},
  Year                     = {2015},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1503.02531}
}

@Article{hinton2012-dropout,
  Title                    = {Improving neural networks by preventing co-adaptation of feature detectors},
  Author                   = {Hinton, Geoffrey E and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R},
  Journal                  = {arXiv preprint arXiv:1207.0580},
  Year                     = {2012},

  Month                    = jul,

  Url                      = {https://arxiv.org/abs/1207.0580}
}

@Article{howard2013some,
  Title                    = {Some improvements on deep convolutional neural network based image classification},
  Author                   = {Howard, Andrew G},
  Journal                  = {arXiv preprint arXiv:1312.5402},
  Year                     = {2013},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1312.5402}
}

@Article{huang2016densely,
  Title                    = {Densely connected convolutional networks},
  Author                   = {Huang, Gao and Liu, Zhuang and Weinberger, Kilian Q},
  Journal                  = {arXiv preprint arXiv:1608.06993},
  Year                     = {2016},

  Month                    = aug,

  Url                      = {https://arxiv.org/abs/1608.06993v1}
}

@Article{huang2016deep,
  Title                    = {Deep networks with stochastic depth},
  Author                   = {Huang, Gao and Sun, Yu and Liu, Zhuang and Sedra, Daniel and Weinberger, Kilian},
  Journal                  = {arXiv preprint arXiv:1603.09382},
  Year                     = {2016},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1603.09382}
}

@Article{huh2016makes,
  Title                    = {What makes {ImageNet} good for transfer learning?},
  Author                   = {Huh, Minyoung and Agrawal, Pulkit and Efros, Alexei A},
  Journal                  = {arXiv preprint arXiv:1608.08614},
  Year                     = {2016},

  Month                    = aug,

  Url                      = {https://arxiv.org/abs/1608.08614}
}

@Article{BatchNormalization-2015,
  Title                    = {Batch normalization: {Accelerating} deep network training by reducing internal covariate shift},
  Author                   = {Ioffe, Sergey and Szegedy, Christian},
  Journal                  = {arXiv preprint arXiv:1502.03167},
  Year                     = {2015},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1502.03167}
}

@InProceedings{jin2016deep,
  Title                    = {Deep learning with s-shaped rectified linear activation units},
  Author                   = {Jin, Xiaojie and Xu, Chunyan and Feng, Jiashi and Wei, Yunchao and Xiong, Junjun and Yan, Shuicheng},
  Booktitle                = {Thirtieth AAAI Conference on Artificial Intelligence},
  Year                     = {2016},
  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1512.07030}
}

@Misc{Karpathy2011,
  Title                    = {Lessons learned from manually classifying {CIFAR-10}},

  Author                   = {Andrej Karpathy},
  Month                    = apr,
  Year                     = {2011},

  Url                      = {http://karpathy.github.io/2011/04/27/manually-classifying-cifar10/}
}

@Book{kaufman2009finding-diana,
  Title                    = {Finding groups in data: an introduction to cluster analysis},
  Author                   = {Kaufman, Leonard and Rousseeuw, Peter J},
  Publisher                = {John Wiley \& Sons},
  Year                     = {2009},
  Volume                   = {344}
}

@InCollection{kavukcuoglu2010learning,
  Title                    = {Learning Convolutional Feature Hierarchies for Visual Recognition},
  Author                   = {Kavukcuoglu, Koray and Pierre Sermanet and Y-lan Boureau and Gregor, Karol and Michael Mathieu and Yann L. Cun},
  Booktitle                = {Advances in Neural Information Processing Systems 23 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2010},
  Editor                   = {J. D. Lafferty and C. K. I. Williams and J. Shawe-Taylor and R. S. Zemel and A. Culotta},
  Pages                    = {1090--1098},

  Url                      = {http://papers.nips.cc/paper/4133-learning-convolutional-feature-hierarchies-for-visual-recognition.pdf}
}

@Article{keskar2016large,
  Title                    = {On large-batch training for deep learning: {Generalization} gap and sharp minima},
  Author                   = {Keskar, Nitish Shirish and Mudigere, Dheevatsa and Nocedal, Jorge and Smelyanskiy, Mikhail and Tang, Ping Tak Peter},
  Journal                  = {arXiv preprint arXiv:1609.04836},
  Year                     = {2016},

  Month                    = sep,

  Url                      = {https://arxiv.org/abs/1609.04836}
}

@Article{kim2015deep,
  Title                    = {Deep Clustered Convolutional Kernels},
  Author                   = {Kim, Minyoung and Rigazio, Luca},
  Journal                  = {arXiv preprint arXiv:1503.01824},
  Year                     = {2015},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1503.01824}
}

@Article{kim2015compression,
  Title                    = {Compression of deep convolutional neural networks for fast and low power mobile applications},
  Author                   = {Kim, Yong-Deok and Park, Eunhyeok and Yoo, Sungjoo and Choi, Taelim and Yang, Lu and Shin, Dongjun},
  Journal                  = {arXiv preprint arXiv:1511.06530},
  Year                     = {2015},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1511.06530}
}

@Article{kingma2014adam,
  Title                    = {Adam: A method for stochastic optimization},
  Author                   = {Kingma, Diederik and Ba, Jimmy},
  Journal                  = {arXiv preprint arXiv:1412.6980},
  Year                     = {2014},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1412.6980}
}

@PhdThesis{kocmanek2015hyperneat,
  Title                    = {{HyperNEAT} and Novelty Search for Image Recognition},
  Author                   = {Kocm{\'a}nek, T},
  School                   = {Master’s thesis, Czech Technical University in Prague},
  Year                     = {2015},

  Url                      = {http://kocmi.tk/photos/DiplomaThesis.pdf}
}

@Misc{CIFAR-10,
  Title                    = {The {CIFAR-10} dataset},

  Author                   = {Alex Krizhevsky},

  Url                      = {https://www.cs.toronto.edu/~kriz/cifar.html}
}

@Article{krizhevsky2009learning,
  Title                    = {Learning multiple layers of features from tiny images},
  Author                   = {Krizhevsky, Alex and Hinton, Geoffrey},
  Year                     = {2009},

  Month                    = apr,

  File                     = {:home/moose/GitHub/msthesis/paper/10.1.1.222.9220.pdf:PDF},
  Publisher                = {Citeseer},
  Url                      = {https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf}
}

@InCollection{AlexNet-2012,
  Title                    = {ImageNet Classification with Deep Convolutional Neural Networks},
  Author                   = {Alex Krizhevsky and Sutskever, Ilya and Hinton, Geoffrey E},
  Booktitle                = {Advances in Neural Information Processing Systems 25 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2012},
  Editor                   = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger},
  Pages                    = {1097--1105},

  Url                      = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf}
}

@Article{971754,
  Title                    = {Comparison of worst case errors in linear and neural network approximation},
  Author                   = {V. Kurkova and M. Sanguineti},
  Journal                  = {IEEE Transactions on Information Theory},
  Year                     = {2002},

  Month                    = jan,
  Number                   = {1},
  Pages                    = {264-275},
  Volume                   = {48},

  Doi                      = {10.1109/18.971754},
  ISSN                     = {0018-9448},
  Keywords                 = {approximation theory;error analysis;feedforward neural nets;function approximation;perceptrons;set theory;fixed basis functions;linear approximation;multivariable functions;multivariable optimization;neural network approximation;one-hidden-layer feedforward neural networks;perceptron networks;sets;variable basis functions;worst case errors;Adaptive control;Chromium;Computer aided software engineering;Computer science;Electrical equipment industry;Industrial control;Intelligent networks;Joining materials;Linear approximation;Neural networks},
  Url                      = {http://ieeexplore.ieee.org/abstract/document/971754/}
}

@InBook{vanLaarhoven1987,
  Title                    = {Simulated annealing},
  Author                   = {van Laarhoven, Peter J. M.
and Aarts, Emile H. L.},
  Pages                    = {7--15},
  Publisher                = {Springer Netherlands},
  Year                     = {1987},

  Address                  = {Dordrecht},

  Booktitle                = {Simulated Annealing: {Theory} and Applications},
  Doi                      = {10.1007/978-94-015-7744-1_2},
  ISBN                     = {978-94-015-7744-1},
  Url                      = {http://dx.doi.org/10.1007/978-94-015-7744-1_2}
}

@InProceedings{lavin2016fast,
  Title                    = {Fast algorithms for convolutional neural networks},
  Author                   = {Lavin, Andrew and Gray, Scott},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2016},
  Month                    = sep,
  Organization             = {IEEE},
  Pages                    = {4013--4021},

  Url                      = {https://arxiv.org/abs/1509.09308}
}

@InProceedings{le2013building,
  Title                    = {Building high-level features using large scale unsupervised learning},
  Author                   = {Le, Quoc V},
  Booktitle                = {International conference on acoustics, speech and signal processing},
  Year                     = {2013},
  Organization             = {IEEE},
  Pages                    = {8595--8598},

  Url                      = {http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343}
}

@InBook{EfficientBackprop,
  Title                    = {Efficient BackProp},
  Author                   = {LeCun, Yann A.
and Bottou, L{\'e}on
and Orr, Genevieve B.
and M{\"u}ller, Klaus-Robert},
  Editor                   = {Montavon, Gr{\'e}goire
and Orr, Genevi{\`e}ve B.
and M{\"u}ller, Klaus-Robert},
  Pages                    = {9--50},
  Publisher                = {Springer Berlin Heidelberg},
  Year                     = {1998},

  Address                  = {Berlin, Heidelberg},
  Series                   = {Lecture Notes in Computer Science},
  Volume                   = {1524},

  Booktitle                = {Neural Networks: {Tricks} of the Trade: Second Edition},
  ISBN                     = {978-3-642-35289-8},
  Url                      = {http://dx.doi.org/10.1007/3-540-49430-8}
}

@Article{lecun2015deep,
  Title                    = {Deep learning},
  Author                   = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
  Journal                  = {Nature},
  Year                     = {2015},

  Month                    = may,
  Number                   = {7553},
  Pages                    = {436--444},
  Volume                   = {521},

  Publisher                = {Nature Publishing Group},
  Url                      = {http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html}
}

@Article{LeNet-5,
  Title                    = {Gradient-based learning applied to document recognition},
  Author                   = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
  Journal                  = {Proceedings of the IEEE},
  Year                     = {1998},

  Month                    = nov,
  Number                   = {11},
  Pages                    = {2278-2324},
  Volume                   = {86},

  Doi                      = {10.1109/5.726791},
  ISSN                     = {0018-9219},
  Keywords                 = {backpropagation;convolution;multilayer perceptrons;optical character recognition;2D shape variability;GTN;back-propagation;cheque reading;complex decision surface synthesis;convolutional neural network character recognizers;document recognition;document recognition systems;field extraction;gradient based learning technique;gradient-based learning;graph transformer networks;handwritten character recognition;handwritten digit recognition task;high-dimensional patterns;language modeling;multilayer neural networks;multimodule systems;performance measure minimization;segmentation recognition;Character recognition;Feature extraction;Hidden Markov models;Machine learning;Multi-layer neural network;Neural networks;Optical character recognition software;Optical computing;Pattern recognition;Principal component analysis},
  Url                      = {http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf}
}

@InProceedings{lecun1989optimal,
  Title                    = {Optimal brain damage.},
  Author                   = {LeCun, Yann and Denker, John S and Solla, Sara A and Howard, Richard E and Jackel, Lawrence D},
  Booktitle                = {NIPs},
  Year                     = {1989},
  Pages                    = {598--605},
  Volume                   = {2},

  Url                      = {http://yann.lecun.com/exdb/publis/pdf/lecun-90b.pdf}
}

@InProceedings{lee2016generalizing,
  Title                    = {Generalizing pooling functions in convolutional neural networks: {Mixed}, gated, and tree},
  Author                   = {Lee, Chen-Yu and Gallagher, Patrick W and Tu, Zhuowen},
  Booktitle                = {International Conference on Artificial Intelligence and Statistics},
  Year                     = {2016},

  Url                      = {https://arxiv.org/abs/1509.08985v2}
}

@Article{li2016learning,
  Title                    = {Learning to optimize},
  Author                   = {Li, Ke and Malik, Jitendra},
  Journal                  = {arXiv preprint arXiv:1606.01885},
  Year                     = {2016},

  Month                    = jun,

  Url                      = {https://arxiv.org/abs/1606.01885}
}

@Article{li2016hyperband,
  Title                    = {Hyperband: {A} Novel Bandit-Based Approach to Hyperparameter Optimization},
  Author                   = {Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
  Journal                  = {arXiv preprint arXiv:1603.06560},
  Year                     = {2016},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1603.06560}
}

@Article{LingxiXie2017,
  Title                    = {Genetic {CNN}},
  Author                   = {Lingxi Xie, Alan Yuille},
  Journal                  = {arXiv preprint arXiv:1703.01513},
  Year                     = {2017},

  Month                    = mar,

  Url                      = {https://arxiv.org/abs/1703.01513}
}

@InProceedings{liu2016ssd,
  Title                    = {{SSD}: {Single} shot multibox detector},
  Author                   = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C},
  Booktitle                = {European Conference on Computer Vision (ECCV)},
  Year                     = {2016},
  Organization             = {Springer},
  Pages                    = {21--37},

  Url                      = {https://arxiv.org/abs/1512.02325}
}

@InProceedings{long2015fully,
  Title                    = {Fully convolutional networks for semantic segmentation},
  Author                   = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2015},
  Month                    = mar,
  Organization             = {IEEE},
  Pages                    = {3431--3440},

  Url                      = {https://arxiv.org/abs/1411.4038v2}
}

@Article{DBLP:journals/corr/LoshchilovH16a,
  Title                    = {{SGDR:} Stochastic Gradient Descent with Restarts},
  Author                   = {Ilya Loshchilov and
 Frank Hutter},
  Journal                  = {arXiv preprint arXiv:1608.03983},
  Year                     = {2016},

  Month                    = aug,
  Volume                   = {abs/1608.03983},

  Bibsource                = {dblp computer science bibliography, http://dblp.org},
  Biburl                   = {http://dblp.uni-trier.de/rec/bib/journals/corr/LoshchilovH16a},
  Timestamp                = {Fri, 02 Sep 2016 17:46:24 +0200},
  Url                      = {https://arxiv.org/abs/1608.03983}
}

@Article{loshchilov10sgdr,
  Title                    = {{SGDR:} Stochastic Gradient Descent with Warm Restarts},
  Author                   = {Loshchilov, Ilya and Hutter, Frank},
  Journal                  = {Learning},
  Year                     = {2016},

  Month                    = aug,

  Url                      = {https://arxiv.org/abs/1608.03983}
}

@InProceedings{maas2013rectifier,
  Title                    = {Rectifier nonlinearities improve neural network acoustic models},
  Author                   = {Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y},
  Booktitle                = {Proc. ICML},
  Year                     = {2013},
  Number                   = {1},
  Volume                   = {30},

  Url                      = {https://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf}
}

@Article{maaten2008visualizing,
  Title                    = {Visualizing data using {t-SNE}},
  Author                   = {Maaten, Laurens van der and Hinton, Geoffrey},
  Journal                  = {Journal of Machine Learning Research},
  Year                     = {2008},
  Number                   = {Nov},
  Pages                    = {2579--2605},
  Volume                   = {9}
}

@InProceedings{maclaurin2015gradient,
  Title                    = {Gradient-based hyperparameter optimization through reversible learning},
  Author                   = {Maclaurin, Dougal and Duvenaud, David and Adams, Ryan},
  Booktitle                = {International Conference on Machine Learning (ICML)},
  Year                     = {2015},
  Pages                    = {2113--2122}
}

@Article{mahendran2016visualizing,
  Title                    = {Visualizing deep convolutional neural networks using natural pre-images},
  Author                   = {Mahendran, Aravindh and Vedaldi, Andrea},
  Journal                  = {International Journal of Computer Vision},
  Year                     = {2016},

  Month                    = apr,
  Pages                    = {1--23},

  Publisher                = {Springer},
  Url                      = {https://arxiv.org/abs/1512.02017}
}

@Misc{Majumdar2017-densenet-weights,
  Title                    = {DenseNet},

  Author                   = {Somshubra Majumdar},
  HowPublished             = {GitHub},
  Month                    = feb,
  Year                     = {2017},

  Url                      = {https://github.com/titu1994/DenseNet}
}

@InProceedings{marszalek2007accurate,
  Title                    = {Accurate object localization with shape masks},
  Author                   = {Marszalek, Marcin and Schmid, Cordelia},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2007},
  Organization             = {IEEE},
  Pages                    = {1--8},

  Url                      = {http://ieeexplore.ieee.org/document/4270110/}
}

@Misc{IG02-dataset,
  Title                    = {{INRIA} Annotations for {Graz-02} ({IG02})},

  Author                   = {Marcin Marszałek},
  Month                    = oct,
  Year                     = {2008},

  Url                      = {http://lear.inrialpes.fr/people/marszalek/data/ig02/}
}

@Article{mcculloch1943logical,
  Title                    = {A logical calculus of the ideas immanent in nervous activity},
  Author                   = {McCulloch, Warren S and Pitts, Walter},
  Journal                  = {The bulletin of mathematical biophysics},
  Year                     = {1943},
  Number                   = {4},
  Pages                    = {115--133},
  Volume                   = {5},

  Publisher                = {Springer}
}

@InProceedings{7301739,
  Title                    = {Data-augmentation for reducing dataset bias in person re-identification},
  Author                   = {N. McLaughlin and J. M. Del Rincon and P. Miller},
  Booktitle                = {International Conference on Advanced Video and Signal Based Surveillance (AVSS)},
  Year                     = {2015},
  Month                    = aug,
  Number                   = {12},
  Pages                    = {1-6},

  Doi                      = {10.1109/AVSS.2015.7301739},
  Keywords                 = {image processing;convolutional network based reidentification system;cross-dataset generalisation;data augmentation;dataset bias reduction;image background;person reidentification;Accuracy;Cameras;Image color analysis;Lighting;Standards;Testing;Training},
  Url                      = {http://ieeexplore.ieee.org/abstract/document/7301739/}
}

@Article{mishkin2015all,
  Title                    = {All you need is a good init},
  Author                   = {Mishkin, Dmytro and Matas, Jiri},
  Journal                  = {arXiv preprint arXiv:1511.06422},
  Year                     = {2015},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1511.06422}
}

@Article{mishkin2016systematic,
  Title                    = {Systematic evaluation of {CNN} advances on the {ImageNet}},
  Author                   = {Mishkin, Dmytro and Sergievskiy, Nikolay and Matas, Jiri},
  Journal                  = {arXiv preprint arXiv:1606.02228},
  Year                     = {2016},

  Month                    = jun,

  Url                      = {https://arxiv.org/abs/1606.02228}
}

@InCollection{natarajan2013learning,
  Title                    = {Learning with Noisy Labels},
  Author                   = {Natarajan, Nagarajan and Dhillon, Inderjit S and Ravikumar, Pradeep K and Tewari, Ambuj},
  Booktitle                = {Advances in Neural Information Processing Systems 26 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2013},
  Editor                   = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
  Pages                    = {1196--1204},

  Url                      = {http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf}
}

@InProceedings{nesterov1983method,
  Title                    = {A method of solving a convex programming problem with convergence rate O (1/k2)},
  Author                   = {Nesterov, Yurii},
  Booktitle                = {Soviet Mathematics Doklady},
  Year                     = {1983},
  Number                   = {2},
  Pages                    = {372--376},
  Volume                   = {27}
}

@InProceedings{netzer2011reading,
  Title                    = {Reading digits in natural images with unsupervised feature learning},
  Author                   = {Netzer, Yuval and Wang, Tao and Coates, Adam and Bissacco, Alessandro and Wu, Bo and Ng, Andrew Y},
  Booktitle                = {NIPS workshop on deep learning and unsupervised feature learning},
  Year                     = {2011},
  Number                   = {2},
  Pages                    = {5},
  Volume                   = {2011},

  Url                      = {http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf}
}

@Misc{YuvalNetzer2011,
  Title                    = {The Street View House Numbers ({SVHN}) Dataset},

  Author                   = {Yuval Netzer AND Tao Wang AND Adam Coates AND Alessandro Bissacco AND Bo Wu AND Andrew Y. Ng},
  Year                     = {2011},

  Url                      = {http://ufldl.stanford.edu/housenumbers/}
}

@Misc{Ng2016,
  Title                    = {Nuts and bolts of building AI applications using Deep Learning},

  Author                   = {Andrew Ng},
  HowPublished             = {NIPS Talk},
  Month                    = dec,
  Year                     = {2016}
}

@Article{ng2002clarans,
  Title                    = {{CLARANS}: {A} method for clustering objects for spatial data mining},
  Author                   = {Ng, Raymond T. and Han, Jiawei},
  Journal                  = {IEEE transactions on knowledge and data engineering},
  Year                     = {2002},
  Number                   = {5},
  Pages                    = {1003--1016},
  Volume                   = {14},

  Publisher                = {IEEE}
}

@Article{nguyen2016multifaceted,
  Title                    = {Multifaceted Feature Visualization: {Uncovering} the Different Types of Features Learned By Each Neuron in Deep Neural Networks},
  Author                   = {Nguyen, Anh and Yosinski, Jason and Clune, Jeff},
  Journal                  = {arXiv preprint arXiv:1602.03616},
  Year                     = {2016},

  Month                    = may,

  Url                      = {https://arxiv.org/abs/1602.03616}
}

@InProceedings{nguyen2015deep,
  Title                    = {Deep neural networks are easily fooled: {High} confidence predictions for unrecognizable images},
  Author                   = {Nguyen, Anh and Yosinski, Jason and Clune, Jeff},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2015},
  Month                    = dec,
  Organization             = {IEEE},
  Pages                    = {427--436},

  Url                      = {https://arxiv.org/abs/1412.1897v4}
}

@Article{nowlan1992simplifying,
  Title                    = {Simplifying neural networks by soft weight-sharing},
  Author                   = {Nowlan, Steven J and Hinton, Geoffrey E},
  Journal                  = {Neural computation},
  Year                     = {1992},
  Number                   = {4},
  Pages                    = {473--493},
  Volume                   = {4},

  Publisher                = {MIT Press},
  Url                      = {https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf}
}

@Article{ortigosa2016towards,
  Title                    = {Towards Competitive Classifiers for Unbalanced Classification Problems: {A} Study on the Performance Scores},
  Author                   = {Ortigosa-Hern{\'a}ndez, Jonathan and Inza, I{\~n}aki and Lozano, Jose A},
  Journal                  = {arXiv preprint arXiv:1608.08984},
  Year                     = {2016},

  Month                    = aug,

  Url                      = {https://arxiv.org/abs/1608.08984}
}

@Article{papernot2015distillation,
  Title                    = {Distillation as a defense to adversarial perturbations against deep neural networks},
  Author                   = {Papernot, Nicolas and McDaniel, Patrick and Wu, Xi and Jha, Somesh and Swami, Ananthram},
  Journal                  = {arXiv preprint arXiv:1511.04508},
  Year                     = {2015},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1511.04508}
}

@InBook{Prechelt1998,
  Title                    = {Early Stopping - But When?},
  Author                   = {Prechelt, Lutz},
  Editor                   = {Orr, Genevieve B.
and M{\"u}ller, Klaus-Robert},
  Pages                    = {55--69},
  Publisher                = {Springer Berlin Heidelberg},
  Year                     = {1998},

  Address                  = {Berlin, Heidelberg},

  Booktitle                = {Neural Networks: {Tricks} of the Trade},
  Doi                      = {10.1007/3-540-49430-8_3},
  ISBN                     = {978-3-540-49430-0},
  Url                      = {http://dx.doi.org/10.1007/3-540-49430-8_3}
}

@Article{ribeiro2016should,
  Title                    = {"Why Should I Trust You?": {Explaining} the Predictions of Any Classifier},
  Author                   = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
  Journal                  = {arXiv preprint arXiv:1602.04938},
  Year                     = {2016},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1602.04938}
}

@InProceedings{risi2010evolving,
  Title                    = {Evolving the placement and density of neurons in the hyperneat substrate},
  Author                   = {Risi, Sebastian and Lehman, Joel and Stanley, Kenneth O},
  Booktitle                = {Conference on Genetic and evolutionary computation},
  Year                     = {2010},
  Number                   = {12},
  Organization             = {ACM},
  Pages                    = {563--570}
}

@InProceedings{ronneberger2015u,
  Title                    = {U-net: {Convolutional} networks for biomedical image segmentation},
  Author                   = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
  Booktitle                = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
  Year                     = {2015},
  Organization             = {Springer},
  Pages                    = {234--241},

  Url                      = {https://arxiv.org/abs/1505.04597}
}

@Article{ruder2016overview,
  Title                    = {An overview of gradient descent optimization algorithms},
  Author                   = {Ruder, Sebastian},
  Journal                  = {arXiv preprint arXiv:1609.04747},
  Year                     = {2016},

  Month                    = sep,

  Url                      = {https://arxiv.org/abs/1609.04747}
}

@Article{russakovsky2014imagenet,
  Title                    = {Imagenet large scale visual recognition challenge},
  Author                   = {Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and Berg, Alexander C. and Fei-Fei, Li},
  Journal                  = {arXiv preprint arXiv:1409.0575},
  Year                     = {2014},

  Month                    = sep,
  Number                   = {3},
  Pages                    = {211--252},
  Volume                   = {115},

  Publisher                = {Springer},
  Url                      = {https://arxiv.org/abs/1409.0575}
}

@Article{saxe2013exact,
  Title                    = {Exact solutions to the nonlinear dynamics of learning in deep linear neural networks},
  Author                   = {Saxe, Andrew M and McClelland, James L and Ganguli, Surya},
  Journal                  = {arXiv preprint arXiv:1312.6120},
  Year                     = {2013},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1312.6120}
}

@Article{saxena2016convolutional,
  Title                    = {Convolutional Neural Fabrics},
  Author                   = {Saxena, Shreyas and Verbeek, Jakob},
  Journal                  = {arXiv preprint arXiv:1606.02492},
  Year                     = {2016},

  Url                      = {https://arxiv.org/abs/1606.02492}
}

@InProceedings{schroff2015facenet,
  Title                    = {Facenet: {A} unified embedding for face recognition and clustering},
  Author                   = {Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2015},
  Month                    = mar,
  Organization             = {IEEE},
  Pages                    = {815--823},

  Url                      = {https://arxiv.org/abs/1503.03832}
}

@InProceedings{senior2013empirical,
  Title                    = {An empirical study of learning rates in deep neural networks for speech recognition},
  Author                   = {Senior, Andrew and Heigold, Georg and Yang, Ke and others},
  Booktitle                = {International Conference on Acoustics, Speech and Signal Processing},
  Year                     = {2013},
  Organization             = {IEEE},
  Pages                    = {6724--6728},

  Url                      = {http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963}
}

@InProceedings{sermanet2012convolutional,
  Title                    = {Convolutional neural networks applied to house numbers digit classification},
  Author                   = {Sermanet, Pierre and Chintala, Soumith and LeCun, Yann},
  Booktitle                = {International Conference on Pattern Recognition (ICPR)},
  Year                     = {2012},
  Month                    = apr,
  Number                   = {21},
  Organization             = {IEEE},
  Pages                    = {3288--3291},

  Url                      = {https://arxiv.org/abs/1204.3968}
}

@Article{sermanet2013overfeat,
  Title                    = {Overfeat: {Integrated} recognition, localization and detection using convolutional networks},
  Author                   = {Sermanet, Pierre and Eigen, David and Zhang, Xiang and Mathieu, Micha{\"e}l and Fergus, Rob and LeCun, Yann},
  Journal                  = {arXiv preprint arXiv:1312.6229},
  Year                     = {2013},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1312.6229v4}
}

@InProceedings{6033589,
  Title                    = {Traffic sign recognition with multi-scale Convolutional Networks},
  Author                   = {P. Sermanet and Y. LeCun},
  Booktitle                = {International Joint Conference on Neural Networks (IJCNN)},
  Year                     = {2011},
  Month                    = jul,
  Pages                    = {2809-2813},

  Doi                      = {10.1109/IJCNN.2011.6033589},
  ISSN                     = {2161-4393},
  Keywords                 = {computer vision;image classification;image colour analysis;traffic engineering computing;GTSRB competition;HOG;SIFT;greyscale images;hand-crafted features;hierarchy learning;multiscale convolutional network;multistage architecture;traffic sign classification;traffic sign recognition;vision approach;Accuracy;Color;Computer architecture;Feature extraction;Image color analysis;Neural networks;Training},
  Url                      = {http://ieeexplore.ieee.org/document/6033589/}
}

@Article{shin2016deep,
  Title                    = {Deep convolutional neural networks for computer-aided detection: {CNN} architectures, dataset characteristics and transfer learning},
  Author                   = {Shin, Hoo-Chang and Roth, Holger R and Gao, Mingchen and Lu, Le and Xu, Ziyue and Nogues, Isabella and Yao, Jianhua and Mollura, Daniel and Summers, Ronald M},
  Journal                  = {IEEE transactions on medical imaging},
  Year                     = {2016},

  Month                    = feb,
  Number                   = {5},
  Pages                    = {1285--1298},
  Volume                   = {35},

  Publisher                = {IEEE},
  Url                      = {http://ieeexplore.ieee.org/document/7404017/?arnumber=7404017}
}

@Article{simonyan2013deep,
  Title                    = {Deep inside convolutional networks: {Visualising} image classification models and saliency maps},
  Author                   = {Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew},
  Journal                  = {arXiv preprint arXiv:1312.6034},
  Year                     = {2013},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1312.6034}
}

@Article{VGG-16,
  Title                    = {Very deep convolutional networks for large-scale image recognition},
  Author                   = {Simonyan, Karen and Zisserman, Andrew},
  Journal                  = {arXiv preprint arXiv:1409.1556},
  Year                     = {2014},

  Month                    = sep,

  Url                      = {https://arxiv.org/abs/1409.1556}
}

@Article{srivastava2014dropout,
  Title                    = {Dropout: a simple way to prevent neural networks from overfitting.},
  Author                   = {Srivastava, Nitish and Hinton, Geoffrey E and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
  Journal                  = {Journal of Machine Learning Research},
  Year                     = {2014},
  Number                   = {1},
  Pages                    = {1929--1958},
  Volume                   = {15},

  Url                      = {https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
}

@Article{srivastava2014understanding,
  Title                    = {Understanding locally competitive networks},
  Author                   = {Srivastava, Rupesh Kumar and Masci, Jonathan and Gomez, Faustino and Schmidhuber, J{\"u}rgen},
  Journal                  = {arXiv preprint arXiv:1410.1165},
  Year                     = {2014},

  Month                    = oct,

  Url                      = {https://arxiv.org/abs/1410.1165}
}

@Misc{JohannesStallkamp,
  Title                    = {The German Traffic Sign Recognition Benchmark},

  Author                   = {Johannes Stallkamp AND Marc Schlipsing AND Jan Salmen AND Christian Igel},

  Url                      = {http://benchmark.ini.rub.de/?section=gtsrb&subsection=news}
}

@Article{Stallkamp2012GTSRB,
  Title                    = {Man vs. computer: {Benchmarking} machine learning algorithms for traffic sign recognition},
  Author                   = {J. Stallkamp and M. Schlipsing and J. Salmen and C. Igel},
  Journal                  = {Neural Networks},
  Year                     = {2012},
  Number                   = {0},
  Pages                    = { - },

  Doi                      = {10.1016/j.neunet.2012.02.016},
  ISSN                     = {0893-6080},
  Keywords                 = {Traffic sign recognition},
  Url                      = {http://www.sciencedirect.com/science/article/pii/S0893608012000457}
}

@InProceedings{stallkamp2011german,
  Title                    = {The German traffic sign recognition benchmark: a multi-class classification competition},
  Author                   = {Stallkamp, Johannes and Schlipsing, Marc and Salmen, Jan and Igel, Christian},
  Booktitle                = {International Joint Conference on Neural Networks (IJCNN)},
  Year                     = {2011},
  Organization             = {IEEE},
  Pages                    = {1453--1460},

  Url                      = {http://ieeexplore.ieee.org/document/6033395/}
}

@Article{stanley2009hypercube,
  Title                    = {A hypercube-based encoding for evolving large-scale neural networks},
  Author                   = {Stanley, Kenneth O and D'Ambrosio, David B and Gauci, Jason},
  Journal                  = {Artificial life},
  Year                     = {2009},
  Number                   = {2},
  Pages                    = {185--212},
  Volume                   = {15},

  Publisher                = {MIT Press},
  Url                      = {http://ieeexplore.ieee.org/document/6792316/}
}

@Article{stanley2002evolving,
  Title                    = {Evolving neural networks through augmenting topologies},
  Author                   = {Stanley, Kenneth O and Miikkulainen, Risto},
  Journal                  = {Evolutionary computation},
  Year                     = {2002},
  Number                   = {2},
  Pages                    = {99--127},
  Volume                   = {10},

  File                     = {:home/moose/GitHub/informatik-2011/Master/Master-Arbeit/paper/NEAT.pdf:PDF},
  Publisher                = {MIT Press},
  Url                      = {http://www.mitpressjournals.org/doi/abs/10.1162/106365602320169811}
}

@Article{inception-v4,
  Title                    = {Inception-v4, inception-resnet and the impact of residual connections on learning},
  Author                   = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke, Vincent},
  Journal                  = {arXiv preprint arXiv:1602.07261},
  Year                     = {2016},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1602.07261}
}

@InProceedings{GoogleNet-Inception,
  Title                    = {Going deeper with convolutions},
  Author                   = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
  Booktitle                = {Conference on Computer Vision and Pattern Recognition (CVPR)},
  Year                     = {2015},
  Month                    = sep,
  Organization             = {IEEE},
  Pages                    = {1--9},

  Url                      = {https://arxiv.org/abs/1409.4842}
}

@Article{Inception-v3,
  Title                    = {Rethinking the inception architecture for computer vision},
  Author                   = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jonathon and Wojna, Zbigniew},
  Journal                  = {arXiv preprint arXiv:1512.00567},
  Year                     = {2015},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1512.00567v3}
}

@Article{szegedy2013intriguing,
  Title                    = {Intriguing properties of neural networks},
  Author                   = {Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
  Journal                  = {arXiv preprint arXiv:1312.6199},
  Year                     = {2013},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1312.6199v4}
}

@Article{thoma2017hasyv2,
  Title                    = {The {HASYv2} dataset},
  Author                   = {Thoma, Martin},
  Journal                  = {arXiv preprint arXiv:1701.08380},
  Year                     = {2017},

  Month                    = jan,

  Url                      = {https://arxiv.org/abs/1701.08380}
}

@Misc{thoma-msthesis-blog,
  Title                    = {Master Thesis (Blog post)},

  Author                   = {Martin Thoma},
  Month                    = apr,
  Year                     = {2017},

  Url                      = {https://martin-thoma.com/msthesis}
}

@Article{Thoma:2016,
  Title                    = {A Survey of Semantic Segmentation},
  Author                   = {Martin Thoma},
  Journal                  = {arXiv preprint arXiv:1602.06541},
  Year                     = {2016},

  Month                    = feb,

  Url                      = {https://arxiv.org/abs/1602.06541}
}

@Misc{Thom2014,
  Title                    = {The {Twiddle} Algorithm},

  Author                   = {Martin Thoma},
  Month                    = sep,
  Year                     = {2014},

  Url                      = {https://martin-thoma.com/twiddle/}
}

@Misc{Thoma:2014,
  Title                    = {On-line Recognition of Handwritten Mathematical Symbols},

  Author                   = {Martin Thoma},
  Month                    = nov,
  Year                     = {2014},

  Address                  = {Karlsruhe, Germany},
  Keywords                 = {handwriting recognition; on-line; machine learning;
 artificial neural networks; mathematics; classification;
 supervised learning; MLP; multilayer perceptrons; hwrt;
 write-math},
  School                   = {Karlsruhe Institute of Technology},
  Type                     = {{B.S. Thesis}},
  Url                      = {http://martin-thoma.com/write-math}
}

@Article{tieleman2012lecture,
  Title                    = {Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
  Author                   = {Tieleman, Tijmen and Hinton, Geoffrey},
  Journal                  = {COURSERA: Neural Networks for Machine Learning},
  Year                     = {2012},
  Number                   = {2},
  Volume                   = {4},

  Url                      = {http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf}
}

@Article{verbancsics2013generative,
  Title                    = {Generative neuroevolution for deep learning},
  Author                   = {Verbancsics, Phillip and Harguess, Josh},
  Journal                  = {arXiv preprint arXiv:1312.5355},
  Year                     = {2013},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1312.5355}
}

@Article{vorontsov2017orthogonality,
  Title                    = {On orthogonality and learning recurrent networks with long term dependencies},
  Author                   = {Vorontsov, Eugene and Trabelsi, Chiheb and Kadoury, Samuel and Pal, Chris},
  Journal                  = {arXiv preprint arXiv:1702.00071},
  Year                     = {2017},

  Month                    = jan,

  Url                      = {https://arxiv.org/abs/1702.00071}
}

@Article{waibel1989phoneme,
  Title                    = {Phoneme recognition using time-delay neural networks},
  Author                   = {Waibel, Alex and Hanazawa, Toshiyuki and Hinton, Geoffrey and Shikano, Kiyohiro and Lang, Kevin J},
  Journal                  = {IEEE transactions on acoustics, speech, and signal processing},
  Year                     = {1989},

  Month                    = aug,
  Number                   = {3},
  Pages                    = {328--339},
  Volume                   = {37},

  Publisher                = {IEEE},
  Url                      = {http://ieeexplore.ieee.org/document/21701/}
}

@InProceedings{wan2013regularization,
  Title                    = {Regularization of neural networks using dropconnect},
  Author                   = {Wan, Li and Zeiler, Matthew and Zhang, Sixin and Cun, Yann L and Fergus, Rob},
  Booktitle                = {International Conference on Machine Learning (ICML)},
  Year                     = {2013},
  Number                   = {30},
  Pages                    = {1058--1066},

  Url                      = {http://www.matthewzeiler.com/pubs/icml2013/icml2013.pdf}
}

@Article{wang2016torontocity,
  Title                    = {{TorontoCity}: Seeing the World with a Million Eyes},
  Author                   = {Wang, Shenlong and Bai, Min and Mattyus, Gellert and Chu, Hang and Luo, Wenjie and Yang, Bin and Liang, Justin and Cheverie, Joel and Fidler, Sanja and Urtasun, Raquel},
  Journal                  = {arXiv preprint arXiv:1612.00423},
  Year                     = {2016}
}

@InBook{Wang2013,
  Title                    = {A Comparative Study of Encoding, Pooling and Normalization Methods for Action Recognition},
  Author                   = {Wang, Xingxing
and Wang, LiMin
and Qiao, Yu},
  Editor                   = {Lee, Kyoung Mu
and Matsushita, Yasuyuki
and Rehg, James M.
and Hu, Zhanyi},
  Pages                    = {572--585},
  Publisher                = {Springer Berlin Heidelberg},
  Year                     = {2013},

  Address                  = {Berlin, Heidelberg},
  Month                    = nov,
  Number                   = {11},

  Booktitle                = {Asian Conference on Computer Vision (ACCV)},
  Doi                      = {10.1007/978-3-642-37431-9_44},
  ISBN                     = {978-3-642-37431-9},
  Url                      = {http://dx.doi.org/10.1007/978-3-642-37431-9_44}
}

@Article{williams1992simple,
  Title                    = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
  Author                   = {Williams, Ronald J},
  Journal                  = {Machine learning},
  Year                     = {1992},
  Number                   = {3-4},
  Pages                    = {229--256},
  Volume                   = {8},

  Publisher                = {Springer}
}

@Article{wu2015deep,
  Title                    = {Deep image: {Scaling} up image recognition},
  Author                   = {Wu, Ren and Yan, Shengen and Shan, Yi and Dang, Qingqing and Sun, Gang},
  Journal                  = {arXiv preprint arXiv:1501.02876},
  Year                     = {2015},

  Month                    = jul,
  Number                   = {8},
  Volume                   = {7},

  Publisher                = {Arxiv},
  Url                      = {https://arxiv.org/abs/1501.02876v4}
}

@InProceedings{xiao2012adversarial,
  Title                    = {Adversarial Label Flips Attack on Support Vector Machines.},
  Author                   = {Xiao, Han and Xiao, Huang and Eckert, Claudia},
  Booktitle                = {ECAI},
  Year                     = {2012},
  Pages                    = {870--875},

  Url                      = {https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf}
}

@InProceedings{xiao2014error,
  Title                    = {Error-driven incremental learning in deep convolutional neural network for large-scale image classification},
  Author                   = {Xiao, Tianjun and Zhang, Jiaxing and Yang, Kuiyuan and Peng, Yuxin and Zhang, Zheng},
  Booktitle                = {International Conference on Multimedia},
  Year                     = {2014},
  Number                   = {22},
  Organization             = {ACM},
  Pages                    = {177--186}
}

@Article{xie2016aggregated,
  Title                    = {Aggregated Residual Transformations for Deep Neural Networks},
  Author                   = {Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
  Journal                  = {arXiv preprint arXiv:1611.05431},
  Year                     = {2016},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1611.05431v1}
}

@Article{XinLi2016,
  Title                    = {Adversarial Examples Detection in Deep Networks with Convolutional Filter Statistics},
  Author                   = {Xin Li, Fuxin Li},
  Journal                  = {arXiv preprint arXiv:1612.07767},
  Year                     = {2016},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1612.07767}
}

@Article{xu2015empirical,
  Title                    = {Empirical evaluation of rectified activations in convolutional network},
  Author                   = {Xu, Bing and Wang, Naiyan and Chen, Tianqi and Li, Mu},
  Journal                  = {arXiv preprint arXiv:1505.00853},
  Year                     = {2015},

  Month                    = may,

  Url                      = {https://arxiv.org/abs/1505.00853}
}

@Article{xu2011towards,
  Title                    = {Towards optimal one pass large scale learning with averaged stochastic gradient descent},
  Author                   = {Xu, Wei},
  Journal                  = {arXiv preprint arXiv:1107.2490},
  Year                     = {2011},

  Month                    = jul,

  File                     = {:home/moose/GitHub/informatik-2011/Master/Master-Arbeit/paper/towards-optimal-one-pass-lsl-with-a-sgd.pdf:PDF},
  Url                      = {https://arxiv.org/abs/1107.2490}
}

@Misc{YannLeCun1998,
  Title                    = {The {MNIST} database of handwritten digits},

  Author                   = {Yann LeCun, Corinna Cortes, Christopher J.C. Burges},
  Year                     = {1998},

  Url                      = {http://yann.lecun.com/exdb/mnist/}
}

@Article{yu2014visualizing,
  Title                    = {Visualizing and Comparing Convolutional Neural Networks},
  Author                   = {Yu, Wei and Yang, Kuiyuan and Bai, Yalong and Yao, Hongxun and Rui, Yong},
  Journal                  = {arXiv preprint arXiv:1412.6631},
  Year                     = {2014},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1412.6631}
}

@Article{zagoruyko2016wide,
  Title                    = {Wide residual networks},
  Author                   = {Zagoruyko, Sergey and Komodakis, Nikos},
  Journal                  = {arXiv preprint arXiv:1605.07146},
  Year                     = {2016},

  Month                    = may,

  Url                      = {https://arxiv.org/abs/1605.07146}
}

@Article{zeiler2012adadelta,
  Title                    = {ADADELTA: an adaptive learning rate method},
  Author                   = {Zeiler, Matthew D},
  Journal                  = {arXiv preprint arXiv:1212.5701},
  Year                     = {2012},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1212.5701v1}
}

@InProceedings{zeiler2014visualizing,
  Title                    = {Visualizing and understanding convolutional networks},
  Author                   = {Zeiler, Matthew D and Fergus, Rob},
  Booktitle                = {European Conference on Computer Vision (ECCV)},
  Year                     = {2014},
  Month                    = nov,
  Organization             = {Springer},
  Pages                    = {818--833},

  Url                      = {https://arxiv.org/abs/1311.2901}
}

@Article{zeiler2013stochastic,
  Title                    = {Stochastic pooling for regularization of deep convolutional neural networks},
  Author                   = {Zeiler, Matthew D and Fergus, Rob},
  Journal                  = {arXiv preprint arXiv:1301.3557},
  Year                     = {2013},

  Month                    = jan,

  Url                      = {https://arxiv.org/abs/1301.3557v1}
}

@InCollection{zhai2016doubly,
  Title                    = {Doubly Convolutional Neural Networks},
  Author                   = {Zhai, Shuangfei and Cheng, Yu and Zhang, Zhongfei (Mark) and Lu, Weining},
  Booktitle                = {Advances in Neural Information Processing Systems 29 (NIPS)},
  Publisher                = {Curran Associates, Inc.},
  Year                     = {2016},
  Editor                   = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
  Month                    = oct,
  Pages                    = {1082--1090},

  Url                      = {http://papers.nips.cc/paper/6340-doubly-convolutional-neural-networks.pdf}
}

@Article{zhang2016understanding,
  Title                    = {Understanding deep learning requires rethinking generalization},
  Author                   = {Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
  Journal                  = {arXiv preprint arXiv:1611.03530},
  Year                     = {2016},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1611.03530}
}

@InProceedings{zhang2014part,
  Title                    = {Part-based {R-CNNs} for fine-grained category detection},
  Author                   = {Zhang, Ning and Donahue, Jeff and Girshick, Ross and Darrell, Trevor},
  Booktitle                = {European Conference on Computer Vision (ECCV)},
  Year                     = {2014},
  Month                    = jul,
  Organization             = {Springer},
  Pages                    = {834--849},

  Url                      = {https://arxiv.org/abs/1407.3867}
}

@Article{zhao2015stacked,
  Title                    = {Stacked what-where auto-encoders},
  Author                   = {Zhao, Junbo and Mathieu, Michael and Goroshin, Ross and Lecun, Yann},
  Journal                  = {arXiv preprint arXiv:1506.02351},
  Year                     = {2015},

  Month                    = jun,

  Url                      = {https://arxiv.org/abs/1506.02351v1}
}

@InProceedings{7280459,
  Title                    = {Improving deep neural networks using softplus units},
  Author                   = {Hao Zheng and Zhanlei Yang and Wenju Liu and Jizhong Liang and Yanpeng Li},
  Booktitle                = {International Joint Conference on Neural Networks (IJCNN)},
  Year                     = {2015},
  Month                    = jul,
  Pages                    = {1-4},

  Abstract                 = {Recently, DNNs have achieved great improvement for acoustic modeling in speech recognition tasks. However, it is difficult to train the models well when the depth grows. One main reason is that when training DNNs with traditional sigmoid units, the derivatives damp sharply while back-propagating between layers, which restrict the depth of model especially with insufficient training data. To deal with this problem, some unbounded activation functions have been proposed to preserve sufficient gradients, including ReLU and softplus. Compared with ReLU, the smoothing and nonzero properties of the in gradient makes softplus-based DNNs perform better in both stabilization and performance. However, softplus-based DNNs have been rarely exploited for the phoneme recognition task. In this paper, we explore the use of softplus units for DNNs in acoustic modeling for context-independent phoneme recognition tasks. The revised RBM pre-training and dropout strategy are also applied to improve the performance of softplus units. Experiments show that, the DNNs with softplus units get significantly performance improvement and uses less epochs to get convergence compared to the DNNs trained with standard sigmoid units and ReLUs.},
  Doi                      = {10.1109/IJCNN.2015.7280459},
  ISSN                     = {2161-4393},
  Keywords                 = {backpropagation;neural nets;speech recognition;DNN data training;ReLU;acoustic modeling;backpropagation;context-independent phoneme recognition tasks;deep neural networks;dropout strategy;revised RBM pre-training;sigmoid units;softplus units;speech recognition tasks;unbounded activation functions;Speech;TIMIT;deep neural networks;dropout;softplus}
}

@Misc{Zhou2016,
  Title                    = {Places2 Download},

  Author                   = {Bolei Zhou},
  Year                     = {2016},

  Url                      = {http://places2.csail.mit.edu/download.html}
}

@Article{zhou2015learning,
  Title                    = {Learning Deep Features for Discriminative Localization},
  Author                   = {Zhou, Bolei and Khosla, Aditya and Lapedriza, Agata and Oliva, Aude and Torralba, Antonio},
  Journal                  = {arXiv preprint arXiv:1512.04150},
  Year                     = {2015},

  Month                    = dec,

  Url                      = {https://arxiv.org/abs/1512.04150}
}

@Article{zhou2016places,
  Title                    = {Places: {An} Image Database for Deep Scene Understanding},
  Author                   = {Zhou, Bolei and Khosla, Aditya and Lapedriza, Agata and Torralba, Antonio and Oliva, Aude},
  Journal                  = {arXiv preprint arXiv:1610.02055},
  Year                     = {2016},

  Month                    = oct,

  Url                      = {https://arxiv.org/abs/1610.02055}
}

@Article{zoph2016neural,
  Title                    = {Neural architecture search with reinforcement learning},
  Author                   = {Zoph, Barret and Le, Quoc V},
  Journal                  = {arXiv preprint arXiv:1611.01578},
  Year                     = {2016},

  Month                    = nov,

  Url                      = {https://arxiv.org/abs/1611.01578}
}

@Misc{Asirra2017,
  Title                    = {Kaggle Cats and Dogs Dataset},
  Month                    = oct,
  Year                     = {2017},

  Url                      = {https://www.microsoft.com/en-us/download/details.aspx?id=54765}
}

@Misc{Lasagne-Dropout,
  Title                    = {Noise layers},
  Month                    = jan,
  Year                     = {2017},

  Url                      = {http://lasagne.readthedocs.io/en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer}
}

@Misc{tf-dropout,
  Title                    = {tf.nn.dropout},
  Month                    = dec,
  Year                     = {2016},

  Url                      = {https://www.tensorflow.org/api_docs/python/nn/activation_functions_#dropout}
}

@Misc{TF-MNIST-2016,
  Title                    = {{MNIST} For {ML} Beginners},
  Month                    = dec,
  Year                     = {2016},

  Url                      = {https://www.tensorflow.org/tutorials/mnist/beginners/}
}

@Misc{ImageNet-download,
  Title                    = {ImageNet Large Scale Visual Recognition Challenge 2012 ({ILSVRC2012})},
  Year                     = {2012},

  Url                      = {http://www.image-net.org/challenges/LSVRC/2012/nonpub-downloads}
}

@Misc{newbob,
  Title                    = {The training performed by qnstrn},
  Month                    = aug,
  Year                     = {2000},

  Url                      = {http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html}
}

